This material contain the microbial analysis of full scale wastewater treatment plants based on 16S rRNA gene amplicon sequencing.The microbial community has been sequenced using primers covering the V1-V3 region of the 16S rRNA gene. (27F (AGAGTTTGATCCTGGCTCAG, Lane 1991) and 534R (ATTACCGCGGCTGCTGG, Muyzer et al, 1993)). See Kristensen et al. 2020: “Bacteria from the genus Arcobacter are abundant in effluent from wastewater treatment plants” for further details. Raw sequencing data is available in the European Nucleotide Archive with the project ID [PRJEB28796].
The data analysis is primarily performed using the ampvis2 and mmgenome2 packages, but other packages are also used.
library(ampvis2)
library(cowplot)
library(plyr)
library(ggrepel)
library(plotly)
library(gridExtra)
library(grid)
library("vegan")
myotutable <- read.csv("ASVtable_MIDAS.tsv", sep = "\t", check.names = FALSE)
mymetadata <- read.csv("metadataanonymised.txt", sep = "\t", check.names = FALSE)
metadatanumbers <- read.delim(file = "metadatanumberswithoutbadsamplessimplifiedanonymised.txt", header = T, sep = "\t")
d <- amp_load(otutable = myotutable,
metadata = mymetadata)
dnumbers <- amp_load(otutable = data.table::fread("ASVtable_MIDAS.tsv"), metadata = metadatanumbers)
dnumbers <- amp_subset_samples(dnumbers, Type == "Sample")
d <- amp_subset_samples(d, Type == "Sample")
dcontrols <- amp_load(otutable = data.table::fread("ASVtable_MIDAS.tsv"), metadata = metadatanumbers)
dcontrols <- amp_subset_samples(dcontrols, Type == "Control")
Microbial community structure of influent, process tank and effluent of 14 full-scale WWTPs over a 3 month period as determined by 16S rRNA amplicon sequencing and principal component analysis. All samples are shown (influent wastewater colored in green, process tank in blue, and effluent in red).
Before analyzing the data it is often convenient to remove the lower abundant ASVs, because they cause noise in the dataset. This can be done using the filter_taxa phyloseq function. Here we decide for the PCA to keep ASVs with an abundance of 0.1 % or more in at least one sample.
dpca <- amp_subset_samples(d, Plant %in% c("WWTP A","WWTP B", "WWTP G","WWTP D","WWTP E","WWTP F","WWTP H", "WWTP I", "WWTP J", "WWTP K", "WWTP L", "WWTP M", "WWTP N"))
dpca <- amp_subset_samples(dpca, Location %in% c("Influent", "Process tank", "Effluent"))
drpca <- amp_subset_samples(dpca, minreads = 10000)
fig1a <- amp_ordinate(drpca,
type = "pca",
distmeasure = "bray",
sample_color_by = "Location",
sample_colorframe = TRUE,
sample_colorframe_label = "Location",
sample_colorframe_label_size = 5,
sample_label_size = 5)+theme_cowplot()+theme(legend.position="none")
fig1a
ggsave(filename = "fig1a.pdf",plot = fig1a,width = 7,height = 7)
ggsave(filename = "fig1a.png",plot = fig1a,width = 7,height = 7)
ggsave(filename = "fig1a.tiff",plot = fig1a,width = 5,height = 5)
The adonis function from the Vegan package has been used to test significance between the different sample types grouped for each plant to test if sampling locations are significantly different from each other
asvtable.samples.bray <- read.csv("ASVtable_MIDAS_bray.tsv.txt", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samples <- read.csv("metadataanonymised_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.bray)
## [1] 47649 317
dim(metadata.samples)
## [1] 317 14
t <- t(asvtable.samples.bray)
braycurtis.d=vegdist(t(asvtable.samples.bray), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samples)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samples)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 2 19.603 9.8014 37.07 0.19101 0.001 ***
## Residuals 314 83.023 0.2644 0.80899
## Total 316 102.626 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Test show that sampling locations are significantly different from each other
dpcaA <- amp_subset_samples(d, Plant %in% c("WWTP A"))
## 376 samples and 17202 OTUs have been filtered
## Before: 412 samples and 47110 OTUs
## After: 36 samples and 29908 OTUs
dpcaA <- amp_subset_samples(dpcaA, Location %in% c("Influent", "Process tank", "Effluent"))
## 6 samples and 7070 OTUs have been filtered
## Before: 36 samples and 29908 OTUs
## After: 30 samples and 22838 OTUs
drpcaA <- amp_subset_samples(dpcaA, minreads = 10000)
## 0 samples have been filtered.
amp_ordinate(drpcaA,
type = "pca",
distmeasure = "bray",
sample_color_by = "Location",
sample_colorframe = TRUE,
sample_colorframe_label = "Location",
sample_colorframe_label_size = 7,
sample_label_size = 7)+theme_cowplot()+theme(legend.position="none")
asvtable.samples.brayA <- read.csv("statistics/A/infeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesA <- read.csv("statistics/A/infeff/metadataanonymised_A_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayA)
## [1] 2130 20
dim(metadata.samplesA)
## [1] 20 14
t <- t(asvtable.samples.brayA)
braycurtis.d=vegdist(t(asvtable.samples.brayA), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesA)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesA)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 1.0184 1.01844 15.566 0.46375 0.001 ***
## Residuals 18 1.1777 0.06543 0.53625
## Total 19 2.1961 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
asvtable.samples.brayA <- read.csv("statistics/A/proeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesA <- read.csv("statistics/A/proeff/metadataanonymised_A_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayA)
## [1] 2087 20
dim(metadata.samplesA)
## [1] 20 14
t <- t(asvtable.samples.brayA)
braycurtis.d=vegdist(t(asvtable.samples.brayA), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesA)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesA)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 2.04836 2.0484 38.578 0.68186 0.001 ***
## Residuals 18 0.95573 0.0531 0.31814
## Total 19 3.00409 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plant A has long distance to both influent and process tank suggesting community is a mixture of both
dpcaB <- amp_subset_samples(d, Plant %in% c("WWTP B"))
## 372 samples and 13119 OTUs have been filtered
## Before: 412 samples and 47110 OTUs
## After: 40 samples and 33991 OTUs
dpcaB <- amp_subset_samples(dpcaB, Location %in% c("Influent", "Process tank", "Effluent"))
## 4 samples and 3813 OTUs have been filtered
## Before: 40 samples and 33991 OTUs
## After: 36 samples and 30178 OTUs
drpcaB <- amp_subset_samples(dpcaB, minreads = 10000)
## 0 samples have been filtered.
amp_ordinate(drpcaB,
type = "pca",
distmeasure = "bray",
sample_color_by = "Location",
sample_colorframe = TRUE,
sample_colorframe_label = "Location",
sample_colorframe_label_size = 7,
sample_label_size = 7)+theme_cowplot()+theme(legend.position="none")
asvtable.samples.brayB <- read.csv("statistics/B/infeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesB <- read.csv("statistics/B/infeff/metadataanonymised_B_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayB)
## [1] 3322 24
dim(metadata.samplesB)
## [1] 24 14
t <- t(asvtable.samples.brayB)
braycurtis.d=vegdist(t(asvtable.samples.brayB), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesB)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesB)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 1.7933 1.79332 17.723 0.44617 0.001 ***
## Residuals 22 2.2261 0.10119 0.55383
## Total 23 4.0194 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
asvtable.samples.brayB <- read.csv("statistics/B/proeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesB <- read.csv("statistics/B/proeff/metadataanonymised_B_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayB)
## [1] 3575 24
dim(metadata.samplesB)
## [1] 24 14
t <- t(asvtable.samples.brayB)
braycurtis.d=vegdist(t(asvtable.samples.brayB), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesB)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesB)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 1.1354 1.1354 11.231 0.33796 0.001 ***
## Residuals 22 2.2241 0.1011 0.66204
## Total 23 3.3595 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plant B has long distance to both influent and process tank suggesting community is a mixture of both
dpcaC <- amp_subset_samples(d, Plant %in% c("WWTP C"))
## 371 samples and 27609 OTUs have been filtered
## Before: 412 samples and 47110 OTUs
## After: 41 samples and 19501 OTUs
dpcaC <- amp_subset_samples(dpcaC, Location %in% c("Influent", "Process tank", "Effluent"))
## 5 samples and 3966 OTUs have been filtered
## Before: 41 samples and 19501 OTUs
## After: 36 samples and 15535 OTUs
drpcaC <- amp_subset_samples(dpcaC, minreads = 10000)
## 1 samples and 38 OTUs have been filtered
## Before: 36 samples and 15535 OTUs
## After: 35 samples and 15497 OTUs
amp_ordinate(drpcaC,
type = "pca",
distmeasure = "bray",
sample_color_by = "Location",
sample_colorframe = TRUE,
sample_colorframe_label = "Location",
sample_colorframe_label_size = 7,
sample_label_size = 7)+theme_cowplot()+theme(legend.position="none")
asvtable.samples.brayC <- read.csv("statistics/C/infeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesC <- read.csv("statistics/C/infeff/metadataanonymised_C_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayC)
## [1] 1584 24
dim(metadata.samplesC)
## [1] 24 14
t <- t(asvtable.samples.brayC)
braycurtis.d=vegdist(t(asvtable.samples.brayC), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesC)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesC)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 1.5508 1.55084 10.216 0.3171 0.001 ***
## Residuals 22 3.3398 0.15181 0.6829
## Total 23 4.8907 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
asvtable.samples.brayC <- read.csv("statistics/C/proeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesC <- read.csv("statistics/C/proeff/metadataanonymised_C_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayC)
## [1] 1502 24
dim(metadata.samplesC)
## [1] 24 14
t <- t(asvtable.samples.brayC)
braycurtis.d=vegdist(t(asvtable.samples.brayC), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesC)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesC)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 1.6000 1.60003 11.801 0.34913 0.001 ***
## Residuals 22 2.9829 0.13559 0.65087
## Total 23 4.5829 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plant C has long distance to both influent and process tank suggesting community is a mixture of both
dpcaD <- amp_subset_samples(d, Plant %in% c("WWTP D"))
## 391 samples and 25030 OTUs have been filtered
## Before: 412 samples and 47110 OTUs
## After: 21 samples and 22080 OTUs
dpcaD <- amp_subset_samples(dpcaD, Location %in% c("Influent", "Process tank", "Effluent"))
## 3 samples and 2769 OTUs have been filtered
## Before: 21 samples and 22080 OTUs
## After: 18 samples and 19311 OTUs
drpcaD <- amp_subset_samples(dpcaD, minreads = 10000)
## 2 samples and 88 OTUs have been filtered
## Before: 18 samples and 19311 OTUs
## After: 16 samples and 19223 OTUs
amp_ordinate(drpcaD,
type = "pca",
distmeasure = "bray",
sample_color_by = "Location",
sample_colorframe = TRUE,
sample_colorframe_label = "Location",
sample_colorframe_label_size = 7,
sample_label_size = 7)+theme_cowplot()+theme(legend.position="none")
asvtable.samples.brayD <- read.csv("statistics/D/infeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesD <- read.csv("statistics/D/infeff/metadataanonymised_D_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayD)
## [1] 3282 12
dim(metadata.samplesD)
## [1] 12 14
t <- t(asvtable.samples.brayD)
braycurtis.d=vegdist(t(asvtable.samples.brayD), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesD)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesD)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 1.5084 1.50843 11.466 0.53414 0.001 ***
## Residuals 10 1.3156 0.13156 0.46586
## Total 11 2.8240 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
asvtable.samples.brayD <- read.csv("statistics/D/proeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesD <- read.csv("statistics/D/proeff/metadataanonymised_D_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayD)
## [1] 3291 12
dim(metadata.samplesD)
## [1] 12 14
t <- t(asvtable.samples.brayD)
braycurtis.d=vegdist(t(asvtable.samples.brayD), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesD)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesD)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.22865 0.22865 1.4391 0.1258 0.186
## Residuals 10 1.58886 0.15889 0.8742
## Total 11 1.81752 1.0000
Plant D has a longer distance to influent than process tank and the effluent community is not significantly different from process tank community suggesting community contain most process tank bacteria
dpcaE <- amp_subset_samples(d, Plant %in% c("WWTP E"))
## 383 samples and 14911 OTUs have been filtered
## Before: 412 samples and 47110 OTUs
## After: 29 samples and 32199 OTUs
dpcaE <- amp_subset_samples(dpcaE, Location %in% c("Influent", "Process tank", "Effluent"))
## 5 samples and 6402 OTUs have been filtered
## Before: 29 samples and 32199 OTUs
## After: 24 samples and 25797 OTUs
drpcaE <- amp_subset_samples(dpcaE, minreads = 10000)
## 5 samples and 159 OTUs have been filtered
## Before: 24 samples and 25797 OTUs
## After: 19 samples and 25638 OTUs
amp_ordinate(drpcaE,
type = "pca",
distmeasure = "bray",
sample_color_by = "Location",
sample_colorframe = TRUE,
sample_colorframe_label = "Location",
sample_colorframe_label_size = 7,
sample_label_size = 7)+theme_cowplot()+theme(legend.position="none")
asvtable.samples.brayE <- read.csv("statistics/E/infeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesE <- read.csv("statistics/E/infeff/metadataanonymised_E_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayE)
## [1] 5381 18
dim(metadata.samplesE)
## [1] 18 14
t <- t(asvtable.samples.brayE)
braycurtis.d=vegdist(t(asvtable.samples.brayE), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesE)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesE)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 1.8256 1.82557 13.975 0.46623 0.001 ***
## Residuals 16 2.0900 0.13063 0.53377
## Total 17 3.9156 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
asvtable.samples.brayE <- read.csv("statistics/E/proeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesE <- read.csv("statistics/E/proeff/metadataanonymised_E_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayE)
## [1] 2305 12
dim(metadata.samplesE)
## [1] 12 14
t <- t(asvtable.samples.brayE)
braycurtis.d=vegdist(t(asvtable.samples.brayE), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesE)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesE)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.9109 0.9109 7.4971 0.42848 0.014 *
## Residuals 10 1.2150 0.1215 0.57152
## Total 11 2.1259 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plant E has long distance to both influent and process tank suggesting community is a mixture of both
dpcaF <- amp_subset_samples(d, Plant %in% c("WWTP F"))
## 389 samples and 15075 OTUs have been filtered
## Before: 412 samples and 47110 OTUs
## After: 23 samples and 32035 OTUs
dpcaF <- amp_subset_samples(dpcaF, Location %in% c("Influent", "Process tank", "Effluent"))
## 5 samples and 5467 OTUs have been filtered
## Before: 23 samples and 32035 OTUs
## After: 18 samples and 26568 OTUs
drpcaF <- amp_subset_samples(dpcaF, minreads = 10000)
## 2 samples and 82 OTUs have been filtered
## Before: 18 samples and 26568 OTUs
## After: 16 samples and 26486 OTUs
amp_ordinate(drpcaF,
type = "pca",
distmeasure = "bray",
sample_color_by = "Location",
sample_colorframe = TRUE,
sample_colorframe_label = "Location",
sample_colorframe_label_size = 7,
sample_label_size = 7)+theme_cowplot()+theme(legend.position="none")
asvtable.samples.brayF <- read.csv("statistics/F/infeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesF <- read.csv("statistics/F/infeff/metadataanonymised_F_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayF)
## [1] 4478 12
dim(metadata.samplesF)
## [1] 12 14
t <- t(asvtable.samples.brayF)
braycurtis.d=vegdist(t(asvtable.samples.brayF), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesF)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesF)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.78108 0.78108 5.655 0.36123 0.002 **
## Residuals 10 1.38120 0.13812 0.63877
## Total 11 2.16228 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
asvtable.samples.brayF <- read.csv("statistics/F/proeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesF <- read.csv("statistics/F/proeff/metadataanonymised_F_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayF)
## [1] 4224 12
dim(metadata.samplesF)
## [1] 12 14
t <- t(asvtable.samples.brayF)
braycurtis.d=vegdist(t(asvtable.samples.brayF), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesF)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesF)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.27747 0.27747 1.619 0.13934 0.203
## Residuals 10 1.71382 0.17138 0.86066
## Total 11 1.99129 1.00000
Plant F has a longer distance to influent than process tank and the effluent community is not significantly different from process tank community suggesting community contain most process tank bacteria
dpcaG <- amp_subset_samples(d, Plant %in% c("WWTP G"))
## 385 samples and 18011 OTUs have been filtered
## Before: 412 samples and 47110 OTUs
## After: 27 samples and 29099 OTUs
dpcaG <- amp_subset_samples(dpcaG, Location %in% c("Influent", "Process tank", "Effluent"))
## 4 samples and 4728 OTUs have been filtered
## Before: 27 samples and 29099 OTUs
## After: 23 samples and 24371 OTUs
drpcaG <- amp_subset_samples(dpcaG, minreads = 10000)
## 6 samples and 183 OTUs have been filtered
## Before: 23 samples and 24371 OTUs
## After: 17 samples and 24188 OTUs
amp_ordinate(drpcaG,
type = "pca",
distmeasure = "bray",
sample_color_by = "Location",
sample_colorframe = TRUE,
sample_colorframe_label = "Location",
sample_colorframe_label_size = 7,
sample_label_size = 7)+theme_cowplot()+theme(legend.position="none")
asvtable.samples.brayG <- read.csv("statistics/G/infeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesG <- read.csv("statistics/G/infeff/metadataanonymised_G_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayG)
## [1] 4087 18
dim(metadata.samplesG)
## [1] 18 14
t <- t(asvtable.samples.brayG)
braycurtis.d=vegdist(t(asvtable.samples.brayG), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesG)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesG)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 1.0924 1.09245 5.3625 0.25102 0.002 **
## Residuals 16 3.2595 0.20372 0.74898
## Total 17 4.3520 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
asvtable.samples.brayG <- read.csv("statistics/G/proeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesG <- read.csv("statistics/G/proeff/metadataanonymised_G_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayG)
## [1] 3326 11
dim(metadata.samplesG)
## [1] 11 14
t <- t(asvtable.samples.brayG)
braycurtis.d=vegdist(t(asvtable.samples.brayG), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesG)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesG)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.9237 0.92370 5.8943 0.39574 0.001 ***
## Residuals 9 1.4104 0.15671 0.60426
## Total 10 2.3341 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plant G has long distance to both influent and process tank suggesting community is a mixture of both
dpcaH <- amp_subset_samples(d, Plant %in% c("WWTP H"))
## 385 samples and 17113 OTUs have been filtered
## Before: 412 samples and 47110 OTUs
## After: 27 samples and 29997 OTUs
dpcaH <- amp_subset_samples(dpcaH, Location %in% c("Influent", "Process tank", "Effluent"))
## 3 samples and 9170 OTUs have been filtered
## Before: 27 samples and 29997 OTUs
## After: 24 samples and 20827 OTUs
drpcaH <- amp_subset_samples(dpcaH, minreads = 10000)
## 4 samples and 236 OTUs have been filtered
## Before: 24 samples and 20827 OTUs
## After: 20 samples and 20591 OTUs
amp_ordinate(drpcaH,
type = "pca",
distmeasure = "bray",
sample_color_by = "Location",
sample_colorframe = TRUE,
sample_colorframe_label = "Location",
sample_colorframe_label_size = 7,
sample_label_size = 7)+theme_cowplot()+theme(legend.position="none")
asvtable.samples.brayH <- read.csv("statistics/H/infeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesH <- read.csv("statistics/H/infeff/metadataanonymised_H_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayH)
## [1] 2468 18
dim(metadata.samplesH)
## [1] 18 14
t <- t(asvtable.samples.brayH)
braycurtis.d=vegdist(t(asvtable.samples.brayH), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesH)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesH)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 1.5079 1.50790 11.314 0.41422 0.001 ***
## Residuals 16 2.1324 0.13328 0.58578
## Total 17 3.6403 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
asvtable.samples.brayH <- read.csv("statistics/H/proeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesH <- read.csv("statistics/H/proeff/metadataanonymised_H_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayH)
## [1] 2360 12
dim(metadata.samplesH)
## [1] 12 14
t <- t(asvtable.samples.brayH)
braycurtis.d=vegdist(t(asvtable.samples.brayH), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesH)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesH)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.70099 0.70099 5.8763 0.37013 0.002 **
## Residuals 10 1.19290 0.11929 0.62987
## Total 11 1.89389 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plant H has long distance to both influent and process tank suggesting community is a mixture of both
dpcaI <- amp_subset_samples(d, Plant %in% c("WWTP I"))
## 389 samples and 22890 OTUs have been filtered
## Before: 412 samples and 47110 OTUs
## After: 23 samples and 24220 OTUs
dpcaI <- amp_subset_samples(dpcaI, Location %in% c("Influent", "Process tank", "Effluent"))
## 5 samples and 7196 OTUs have been filtered
## Before: 23 samples and 24220 OTUs
## After: 18 samples and 17024 OTUs
drpcaI <- amp_subset_samples(dpcaI, minreads = 10000)
## 0 samples have been filtered.
amp_ordinate(drpcaI,
type = "pca",
distmeasure = "bray",
sample_color_by = "Location",
sample_colorframe = TRUE,
sample_colorframe_label = "Location",
sample_colorframe_label_size = 7,
sample_label_size = 7)+theme_cowplot()+theme(legend.position="none")
asvtable.samples.brayI <- read.csv("statistics/I/infeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesI <- read.csv("statistics/I/infeff/metadataanonymised_I_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayI)
## [1] 2099 12
dim(metadata.samplesI)
## [1] 12 14
t <- t(asvtable.samples.brayI)
braycurtis.d=vegdist(t(asvtable.samples.brayI), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesI)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesI)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 1.66751 1.66751 24.404 0.70934 0.003 **
## Residuals 10 0.68328 0.06833 0.29066
## Total 11 2.35080 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
asvtable.samples.brayI <- read.csv("statistics/I/proeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesI <- read.csv("statistics/I/proeff/metadataanonymised_I_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayI)
## [1] 1784 12
dim(metadata.samplesI)
## [1] 12 14
t <- t(asvtable.samples.brayI)
braycurtis.d=vegdist(t(asvtable.samples.brayI), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesI)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesI)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.37232 0.37232 7.061 0.41387 0.002 **
## Residuals 10 0.52729 0.05273 0.58613
## Total 11 0.89961 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plant I has long distance to both influent and process tank suggesting community is a mixture of both
dpcaJ <- amp_subset_samples(d, Plant %in% c("WWTP J"))
## 390 samples and 17536 OTUs have been filtered
## Before: 412 samples and 47110 OTUs
## After: 22 samples and 29574 OTUs
dpcaJ <- amp_subset_samples(dpcaJ, Location %in% c("Influent", "Process tank", "Effluent"))
## 4 samples and 8987 OTUs have been filtered
## Before: 22 samples and 29574 OTUs
## After: 18 samples and 20587 OTUs
drpcaJ <- amp_subset_samples(dpcaJ, minreads = 10000)
## 0 samples have been filtered.
amp_ordinate(drpcaJ,
type = "pca",
distmeasure = "bray",
sample_color_by = "Location",
sample_colorframe = TRUE,
sample_colorframe_label = "Location",
sample_colorframe_label_size = 7,
sample_label_size = 7)+theme_cowplot()+theme(legend.position="none")
asvtable.samples.brayJ <- read.csv("statistics/J/infeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesJ <- read.csv("statistics/J/infeff/metadataanonymised_J_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayJ)
## [1] 2073 12
dim(metadata.samplesJ)
## [1] 12 14
t <- t(asvtable.samples.brayJ)
braycurtis.d=vegdist(t(asvtable.samples.brayJ), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesJ)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesJ)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.95135 0.95135 9.4437 0.48569 0.002 **
## Residuals 10 1.00739 0.10074 0.51431
## Total 11 1.95875 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
asvtable.samples.brayJ <- read.csv("statistics/J/proeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesJ <- read.csv("statistics/J/proeff/metadataanonymised_J_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayJ)
## [1] 2395 12
dim(metadata.samplesJ)
## [1] 12 14
t <- t(asvtable.samples.brayJ)
braycurtis.d=vegdist(t(asvtable.samples.brayJ), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesJ)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesJ)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.71267 0.71267 8.1914 0.45029 0.005 **
## Residuals 10 0.87002 0.08700 0.54971
## Total 11 1.58269 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plant J has long distance to both influent and process tank suggesting community is a mixture of both
dpcaK <- amp_subset_samples(d, Plant %in% c("WWTP K"))
## 389 samples and 17906 OTUs have been filtered
## Before: 412 samples and 47110 OTUs
## After: 23 samples and 29204 OTUs
dpcaK <- amp_subset_samples(dpcaK, Location %in% c("Influent", "Process tank", "Effluent"))
## 5 samples and 8045 OTUs have been filtered
## Before: 23 samples and 29204 OTUs
## After: 18 samples and 21159 OTUs
drpcaK <- amp_subset_samples(dpcaK, minreads = 10000)
## 1 samples and 72 OTUs have been filtered
## Before: 18 samples and 21159 OTUs
## After: 17 samples and 21087 OTUs
amp_ordinate(drpcaK,
type = "pca",
distmeasure = "bray",
sample_color_by = "Location",
sample_colorframe = TRUE,
sample_colorframe_label = "Location",
sample_colorframe_label_size = 7,
sample_label_size = 7)+theme_cowplot()+theme(legend.position="none")
asvtable.samples.brayK <- read.csv("statistics/K/infeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesK <- read.csv("statistics/K/infeff/metadataanonymised_K_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayK)
## [1] 2691 12
dim(metadata.samplesK)
## [1] 12 14
t <- t(asvtable.samples.brayK)
braycurtis.d=vegdist(t(asvtable.samples.brayK), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesK)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesK)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 1.54185 1.54185 18.379 0.64762 0.001 ***
## Residuals 10 0.83894 0.08389 0.35238
## Total 11 2.38079 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
asvtable.samples.brayK <- read.csv("statistics/K/proeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesK <- read.csv("statistics/K/proeff/metadataanonymised_K_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayK)
## [1] 2347 12
dim(metadata.samplesK)
## [1] 12 14
t <- t(asvtable.samples.brayK)
braycurtis.d=vegdist(t(asvtable.samples.brayK), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesK)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesK)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.85423 0.85423 11.89 0.54317 0.004 **
## Residuals 10 0.71846 0.07185 0.45683
## Total 11 1.57269 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plant K has long distance to both influent and process tank suggesting community is a mixture of both
dpcaL <- amp_subset_samples(d, Plant %in% c("WWTP L"))
## 389 samples and 17456 OTUs have been filtered
## Before: 412 samples and 47110 OTUs
## After: 23 samples and 29654 OTUs
dpcaL <- amp_subset_samples(dpcaL, Location %in% c("Influent", "Process tank", "Effluent"))
## 5 samples and 8145 OTUs have been filtered
## Before: 23 samples and 29654 OTUs
## After: 18 samples and 21509 OTUs
drpcaL <- amp_subset_samples(dpcaL, minreads = 10000)
## 0 samples have been filtered.
amp_ordinate(drpcaL,
type = "pca",
distmeasure = "bray",
sample_color_by = "Location",
sample_colorframe = TRUE,
sample_colorframe_label = "Location",
sample_colorframe_label_size = 7,
sample_label_size = 7)+theme_cowplot()+theme(legend.position="none")
asvtable.samples.brayL <- read.csv("statistics/L/infeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesL <- read.csv("statistics/L/infeff/metadataanonymised_L_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayL)
## [1] 2608 12
dim(metadata.samplesL)
## [1] 12 14
t <- t(asvtable.samples.brayL)
braycurtis.d=vegdist(t(asvtable.samples.brayL), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesL)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesL)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 1.3744 1.37444 12.143 0.54838 0.003 **
## Residuals 10 1.1319 0.11319 0.45162
## Total 11 2.5064 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
asvtable.samples.brayL <- read.csv("statistics/L/proeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesL <- read.csv("statistics/L/proeff/metadataanonymised_L_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayL)
## [1] 2858 12
dim(metadata.samplesL)
## [1] 12 14
t <- t(asvtable.samples.brayL)
braycurtis.d=vegdist(t(asvtable.samples.brayL), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesL)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesL)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 1.57923 1.57923 15.912 0.61408 0.002 **
## Residuals 10 0.99248 0.09925 0.38592
## Total 11 2.57171 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plant L has long distance to both influent and process tank suggesting community is a mixture of both
dpcaM <- amp_subset_samples(d, Plant %in% c("WWTP M"))
## 389 samples and 20287 OTUs have been filtered
## Before: 412 samples and 47110 OTUs
## After: 23 samples and 26823 OTUs
dpcaM <- amp_subset_samples(dpcaM, Location %in% c("Influent", "Process tank", "Effluent"))
## 5 samples and 7229 OTUs have been filtered
## Before: 23 samples and 26823 OTUs
## After: 18 samples and 19594 OTUs
drpcaM <- amp_subset_samples(dpcaM, minreads = 10000)
## 3 samples and 148 OTUs have been filtered
## Before: 18 samples and 19594 OTUs
## After: 15 samples and 19446 OTUs
amp_ordinate(drpcaM,
type = "pca",
distmeasure = "bray",
sample_color_by = "Location",
sample_colorframe = TRUE,
sample_colorframe_label = "Location",
sample_colorframe_label_size = 7,
sample_label_size = 7)+theme_cowplot()+theme(legend.position="none")
asvtable.samples.brayM <- read.csv("statistics/M/infeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesM <- read.csv("statistics/M/infeff/metadataanonymised_M_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayM)
## [1] 3344 12
dim(metadata.samplesM)
## [1] 12 14
t <- t(asvtable.samples.brayM)
braycurtis.d=vegdist(t(asvtable.samples.brayM), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesM)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesM)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 1.3064 1.30640 6.9138 0.40877 0.002 **
## Residuals 10 1.8895 0.18895 0.59123
## Total 11 3.1959 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
asvtable.samples.brayM <- read.csv("statistics/M/proeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesM <- read.csv("statistics/M/proeff/metadataanonymised_M_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayM)
## [1] 2207 12
dim(metadata.samplesM)
## [1] 12 14
t <- t(asvtable.samples.brayM)
braycurtis.d=vegdist(t(asvtable.samples.brayM), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesM)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesM)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 1.4369 1.43695 8.9908 0.47343 0.003 **
## Residuals 10 1.5982 0.15983 0.52657
## Total 11 3.0352 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plant M has long distance to both influent and process tank suggesting community is a mixture of both
dpcaN <- amp_subset_samples(d, Plant %in% c("WWTP N"))
## 389 samples and 17371 OTUs have been filtered
## Before: 412 samples and 47110 OTUs
## After: 23 samples and 29739 OTUs
dpcaN <- amp_subset_samples(dpcaN, Location %in% c("Influent", "Process tank", "Effluent"))
## 5 samples and 10648 OTUs have been filtered
## Before: 23 samples and 29739 OTUs
## After: 18 samples and 19091 OTUs
drpcaN <- amp_subset_samples(dpcaN, minreads = 10000)
## 0 samples have been filtered.
amp_ordinate(drpcaN,
type = "pca",
distmeasure = "bray",
sample_color_by = "Location",
sample_colorframe = TRUE,
sample_colorframe_label = "Location",
sample_colorframe_label_size = 7,
sample_label_size = 7)+theme_cowplot()+theme(legend.position="none")
asvtable.samples.brayN <- read.csv("statistics/N/infeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesN <- read.csv("statistics/N/infeff/metadataanonymised_N_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayN)
## [1] 2387 12
dim(metadata.samplesN)
## [1] 12 14
t <- t(asvtable.samples.brayN)
braycurtis.d=vegdist(t(asvtable.samples.brayN), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesN)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesN)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 1.26325 1.26325 19.379 0.65962 0.005 **
## Residuals 10 0.65187 0.06519 0.34038
## Total 11 1.91512 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
asvtable.samples.brayN <- read.csv("statistics/N/proeff/ASVtable_MIDAS.tsv", header=TRUE, sep = "\t", check.names = FALSE)
metadata.samplesN <- read.csv("statistics/N/proeff/metadataanonymised_N_bray.txt", sep = "\t", check.names = FALSE)
dim(asvtable.samples.brayN)
## [1] 2419 12
dim(metadata.samplesN)
## [1] 12 14
t <- t(asvtable.samples.brayN)
braycurtis.d=vegdist(t(asvtable.samples.brayN), method="bray")
adonis(formula = braycurtis.d ~ Location, data = metadata.samplesN)
##
## Call:
## adonis(formula = braycurtis.d ~ Location, data = metadata.samplesN)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 1.47231 1.47231 28.038 0.73711 0.004 **
## Residuals 10 0.52511 0.05251 0.26289
## Total 11 1.99742 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plant N has long distance to both influent and process tank suggesting community is a mixture of both
The average abundance of the 25 most abundant genera in the influent (n=6) is shown along with their relative abundances in process tank and effluent for all WWTPs sampled during 3 months.
d_pro_sup_eff <- amp_subset_samples(dnumbers, Plant %in% c("WWTP A","WWTP B","WWTP C","WWTP G","WWTP D","WWTP E","WWTP F","WWTP H", "WWTP I", "WWTP J", "WWTP K", "WWTP L", "WWTP M", "WWTP N"))
d_pro_sup_eff1 <- amp_subset_samples(d_pro_sup_eff, Location %in% c("1_Influent","2_Process tank", "6_Effluent"))
d_inf1 <- amp_subset_samples(d_pro_sup_eff, Location %in% c("1_Influent"))
inftaxa2 <- amp_heatmap(d_inf1,
group_by = "Type",
tax_aggregate = "OTU",
tax_show = 200,
plot_colorscale = "sqrt",
plot_values_size = 6,
color_vector = c("White", "Red"),
max_abundance = 30,
min_abundance = 0.1,
round = 1)+
theme(axis.text.x = element_text(size = 18, color = "black", vjust = 0.5),
axis.text.y = element_text(size = 18, color = "black"),
strip.text = element_text(size = 20, color = "black"),
legend.position = "none")
generainf <- amp_heatmap(d_inf1,
group_by = "Type",
tax_aggregate = "Genus",
tax_show = 20000,
plot_colorscale = "sqrt",
plot_values_size = 6,
color_vector = c("White", "Red"),
max_abundance = 30,
min_abundance = 0.1,
round = 1)+
theme(axis.text.x = element_text(size = 18, color = "black", vjust = 0.5),
axis.text.y = element_text(size = 18, color = "black"),
strip.text = element_text(size = 20, color = "black"),
legend.position = "none")
listgenera <- generainf$data$Display
write.csv(listgenera,"listgenera.csv")
top25inf <- amp_heatmap(d_inf1,
group_by = c("Plant", "Week"),
tax_aggregate = "Genus",
tax_show = 10,
plot_colorscale = "sqrt",
plot_values_size = 4,
color_vector = c("White", "Red"),
max_abundance = 30,
round = 1)+
theme(axis.text.x = element_text(size = 18, color = "black", vjust = 0.5),
axis.text.y = element_text(size = 18, color = "black"),
strip.text = element_text(size = 20, color = "black"),
legend.position = "none")
yaverageinfluent_order <- top25inf$data$Display %>% droplevels() %>% levels()
location <- c(
`1_Influent` = "Influent",
`2_Process tank` = "Process tank",
`6_Effluent` = "Effluent"
)
figure2 <-amp_heatmap(d_pro_sup_eff1,
group_by = c("Location", "Plant"),
tax_aggregate = "Genus",
tax_show = yaverageinfluent_order,
order_y = yaverageinfluent_order,
plot_colorscale = "sqrt",
plot_values_size = 2,
plot_values = TRUE,
color_vector = c("White", "Red"),
max_abundance = 30) +
theme(axis.text.x = element_text(size = 7, color = "black", vjust = 0.5),
axis.text.y = element_text(size = 7, color = "black"),
strip.text = element_text(size = 9, color = "black"),
legend.position = "none", legend.title=element_text(size=15), legend.text=element_text(size = 14))+
facet_wrap(~Location, scales = "free_x", labeller = as_labeller(location))
## changing labels
original_labelseff<-levels(figure2$data$Group)
new_labelseff <- (sub("1_","", original_labelseff))
new_labelseff <- (sub("2_","", new_labelseff))
new_labelseff <- (sub("6_","", new_labelseff))
new_labelseff <- (sub("tank","", new_labelseff))
new_labelseff <- (sub(".*? ", "", new_labelseff))
figure2$data$Group.label<-mapvalues(figure2$data$Group,original_labelseff,c(new_labelseff))
figure2<-figure2+aes(x=figure2$data$Group.label)+theme(axis.title.x=element_blank(), strip.text.x = element_text(margin = margin(.2, 0, .2, 0, "cm")))
figure2
ggsave(filename = "fig2a.pdf",plot = figure2,width = 28,height = 8)
ggsave(filename = "fig2a.png",plot = figure2,width = 28,height = 8)
ggsave(filename = "fig2a.tiff",plot = figure2,width = 8.5,height = 3)
Figure 2b shows the average abundance of the 25 most abundant genera in the process tanks (n=6) along with their relative abundances in influent and effluent for all WWTPs sampled during 3 months.
d_pro1 <- amp_subset_samples(d_pro_sup_eff, Location %in% c("2_Process tank"))
top25pro <- amp_heatmap(d_pro1,
group_by = c("Plant", "Week"),
tax_aggregate = "Genus",
tax_show = 10,
plot_colorscale = "sqrt",
plot_values_size = 6,
color_vector = c("White", "Red"),
max_abundance = 30,
round = 1)+
theme(axis.text.x = element_text(size = 18, color = "black", vjust = 0.5),
axis.text.y = element_text(size = 18, color = "black"),
strip.text = element_text(size = 20, color = "black"),
legend.position = "none")
yaveragepro_order <- top25pro$data$Display %>% droplevels() %>% levels()
location <- c(
`1_Influent` = "Influent",
`2_Process tank` = "Process tank",
`6_Effluent` = "Effluent"
)
figur2b <-amp_heatmap(d_pro_sup_eff1,
group_by = c("Location", "Plant"),
tax_aggregate = "Genus",
tax_show = yaveragepro_order,
order_y = yaveragepro_order,
plot_colorscale = "sqrt",
plot_values_size = 2,
plot_values = TRUE,
color_vector = c("White", "Red"),
max_abundance = 30) +
theme(axis.text.x = element_text(size = 7, color = "black", vjust = 0.5),
axis.text.y = element_text(size = 7, color = "black"),
strip.text = element_text(size = 9, color = "black"),
legend.position = "none", legend.title=element_text(size=15), legend.text=element_text(size = 14))+
facet_wrap(~Location, scales = "free_x", labeller = as_labeller(location))
## changing labels
original_labelseff<-levels(figur2b$data$Group)
new_labelseff <- (sub("1_","", original_labelseff))
new_labelseff <- (sub("2_","", new_labelseff))
new_labelseff <- (sub("6_","", new_labelseff))
new_labelseff <- (sub("tank","", new_labelseff))
new_labelseff <- (sub(".*? ", "", new_labelseff))
figur2b$data$Group.label<-mapvalues(figur2b$data$Group,original_labelseff,c(new_labelseff))
figur2b<-figur2b+aes(x=figur2b$data$Group.label)+theme(axis.title.x=element_blank(), strip.text.x = element_text(margin = margin(.2, 0, .2, 0, "cm")))
figur2b
ggsave(filename = "fig2b.pdf",plot = figur2b,width = 28,height = 8)
ggsave(filename = "fig2b.png",plot = figur2b,width = 28,height = 8)
ggsave(filename = "fig2b.tiff",plot = figur2b,width = 8.5,height = 3)
Figure 2c shows the average abundance of the 25 most abundant genera in the effluents (n=6) along with their relative abundances in influent and process tanks for all WWTPs sampled during 3 months.
d_eff1 <- amp_subset_samples(d_pro_sup_eff, Location %in% c("6_Effluent"))
top25eff <- amp_heatmap(d_eff1,
group_by = c("Plant", "Week"),
tax_aggregate = "Genus",
tax_show = 10,
plot_colorscale = "sqrt",
plot_values_size = 2,
color_vector = c("White", "Red"),
max_abundance = 30,
round = 1)+
theme(axis.text.x = element_text(size = 7, color = "black", vjust = 0.5),
axis.text.y = element_text(size = 7, color = "black"),
strip.text = element_text(size = 7, color = "black"),
legend.position = "none")
yaverageeff_order <- top25eff$data$Display %>% droplevels() %>% levels()
location <- c(
`1_Influent` = "Influent",
`2_Process tank` = "Process tank",
`6_Effluent` = "Effluent"
)
figur2c <-amp_heatmap(d_pro_sup_eff1,
group_by = c("Location", "Plant"),
tax_aggregate = "Genus",
tax_show = yaverageeff_order,
order_y = yaverageeff_order,
plot_colorscale = "sqrt",
plot_values_size = 2,
plot_values = TRUE,
color_vector = c("White", "Red"),
max_abundance = 30) +
theme(axis.text.x = element_text(size = 7, color = "black", vjust = 0.5),
axis.text.y = element_text(size = 7, color = "black"),
strip.text = element_text(size = 9, color = "black"),
legend.position = "none", legend.title=element_text(size=15), legend.text=element_text(size = 14))+
facet_wrap(~Location, scales = "free_x", labeller = as_labeller(location))
## changing labels
original_labelseff<-levels(figur2b$data$Group)
new_labelseff <- (sub("1_","", original_labelseff))
new_labelseff <- (sub("2_","", new_labelseff))
new_labelseff <- (sub("6_","", new_labelseff))
new_labelseff <- (sub("tank","", new_labelseff))
new_labelseff <- (sub(".*? ", "", new_labelseff))
figur2c$data$Group.label<-mapvalues(figur2c$data$Group,original_labelseff,c(new_labelseff))
figur2c<-figur2c+aes(x=figur2c$data$Group.label)+theme(axis.title.x=element_blank(), strip.text.x = element_text(margin = margin(.2, 0, .2, 0, "cm")))
figur2c
ggsave(filename = "fig2c.pdf",plot = figur2c,width = 28,height = 8)
ggsave(filename = "fig2c.png",plot = figur2c,width = 28,height = 8)
ggsave(filename = "fig2c.tiff",plot = figur2c,width = 8.7,height = 3)
The most abundant genera of the influent, process tank and effluent over time for the wastewater treatment plant A. Heatmap for the fate of top 10 most abundant incoming bacteria there among Arcobacter.
d_inf <- amp_subset_samples(dpca, Location %in% c("Influent"))
dn_aaw_inf <- amp_subset_samples(d_inf, Plant %in% c("WWTP A"))
top10infaaw <- amp_heatmap(dn_aaw_inf,
group_by = c("Plant", "Week"),
tax_aggregate = "Genus",
tax_show = 10,
plot_colorscale = "sqrt",
plot_values_size = 6,
color_vector = c("White", "Red"),
max_abundance = 30,
round = 1)+
theme(axis.text.x = element_text(size = 18, color = "black", vjust = 0.5),
axis.text.y = element_text(size = 18, color = "black"),
strip.text = element_text(size = 20, color = "black"),
legend.position = "none")
d_aaw <- amp_subset_samples(dnumbers, Plant %in% c("WWTP A"))
d_aaw <- amp_subset_samples(d_aaw, Location %in% c("1_Influent", "2_Process tank", "6_Effluent"))
yaverageinfluent_orderaaw <- top10infaaw$data$Display %>% droplevels() %>% levels()
poster_gen <- amp_heatmap(data = d_aaw,
group_by = c("Location","Week"),
tax_aggregate = "Genus",
tax_show = yaverageinfluent_orderaaw,
order_y = yaverageinfluent_orderaaw,
plot_colorscale = "sqrt",
plot_values_size = 6,
color_vector = c("White", "Red"),
max_abundance = 30) +
theme(axis.text.x = element_text(size = 18, color = "black", vjust = 0.5),
axis.text.y = element_text(size = 18, color = "black"),
strip.text = element_text(size = 20, color = "black"),
legend.position = "none", legend.title=element_text(size=15), legend.text=element_text(size = 14))+
facet_wrap(~Location, scales = "free_x", labeller = as_labeller(location))
original_labelseff<-levels(poster_gen$data$Group)
new_labelseff <- (sub("1_","", original_labelseff))
new_labelseff <- (sub("2_","", new_labelseff))
new_labelseff <- (sub("6_","", new_labelseff))
new_labelseff <- toupper(sub(".*? ", "", new_labelseff))
new_labelseff <- (sub("TANK ","", new_labelseff))
poster_gen$data$Group.label<-mapvalues(poster_gen$data$Group,original_labelseff,c(new_labelseff))
poster_gen<-poster_gen+aes(x=poster_gen$data$Group.label)+theme(axis.title.x=element_blank(), strip.text.x = element_text(margin = margin(.1, 0, .1, 0, "cm")))
poster_gen
ggsave(filename = "poster.pdf",plot = poster_gen,width = 13,height = 4)
ggsave(filename = "poster.png",plot = poster_gen,width = 13,height = 4)
ggsave(filename = "poster.tiff",plot = poster_gen,width = 13,height = 4)
Average abundance of most abundant ASVs in the influents of 14 Danish Wastewater treatment plants
#The number of ASVs in influent total
dn_infs1 <- amp_subset_samples(dnumbers, Plant %in% c("WWTP A","WWTP B","WWTP C","WWTP G","WWTP D","WWTP E","WWTP F","WWTP H", "WWTP I", "WWTP J", "WWTP K", "WWTP L", "WWTP M", "WWTP N"))
dn_infs1 <- amp_subset_samples(dn_infs1, Location %in% c("1_Influent"))
list_OTUs <- amp_heatmap(data = dn_infs1,
tax_aggregate = "OTU",
tax_add = "Genus",
tax_show = 250000,
plot_colorscale = "sqrt",
plot_values_size = 6,
color_vector = c("White", "Red"),
max_abundance = 30,
round = 1)+
theme(axis.text.x = element_text(size = 18, color = "black", vjust = 0.5),
axis.text.y = element_text(size = 18, color = "black"),
strip.text = element_text(size = 20, color = "black"),
legend.position = "none")
otus <- list_OTUs$data$Display
influent_OTUs <-amp_heatmap(data = dn_infs1,
group_by = c("Plant","Date"),
tax_aggregate = "Species",
tax_add = "Genus",
tax_show = 10,
plot_colorscale = "sqrt",
plot_values_size = 6,
color_vector = c("White", "Red"),
max_abundance = 30,
round = 1)+
theme(axis.text.x = element_text(size = 18, color = "black", vjust = 0.5),
axis.text.y = element_text(size = 18, color = "black"),
strip.text = element_text(size = 20, color = "black"),
legend.position = "right", legend.title=element_text(size=15), legend.text=element_text(size = 14))+
facet_wrap(~Plant, scales = "free_x")
## changing labels
original_labels<-levels(influent_OTUs$data$Group)
new_labels <- toupper(sub(".*? ", "", original_labels))
new_labels <- (sub("A ","", new_labels))
new_labels <- (sub("B ","", new_labels))
new_labels <- (sub("C ","", new_labels))
new_labels <- (sub("D ","", new_labels))
new_labels <- (sub("E ","", new_labels))
new_labels <- (sub("F ","", new_labels))
new_labels <- (sub("G ","", new_labels))
new_labels <- (sub("H ","", new_labels))
new_labels <- (sub("I ","", new_labels))
new_labels <- (sub("J ","", new_labels))
new_labels <- (sub("K ","", new_labels))
new_labels <- (sub("L ","", new_labels))
new_labels <- (sub("M ","", new_labels))
new_labels <- (sub("N ","", new_labels))
influent_OTUs$data$Group.label<-mapvalues(influent_OTUs$data$Group,original_labels,c(new_labels))
influent_OTUs<-influent_OTUs+aes(x=influent_OTUs$data$Group.label)+theme(axis.title.x=element_blank(), strip.text.x = element_text(margin = margin(.2, 0, .2, 0, "cm")))
influent_OTUs
ggsave(filename = "s1.pdf",plot = influent_OTUs,width = 25,height = 20)
ggsave(filename = "s1.png",plot = influent_OTUs,width = 25,height = 20)
Heatmaps with timepoints for all 14 wastewater treatment plants
dn_overview14 <- amp_subset_samples(dnumbers, Plant %in% c("WWTP A","WWTP B","WWTP C","WWTP G","WWTP D","WWTP E","WWTP F","WWTP H", "WWTP I", "WWTP J", "WWTP K", "WWTP L", "WWTP M", "WWTP N"))
dn_overview14 <- amp_subset_samples(dn_overview14, Location %in% c("1_Influent", "2_Process tank", "6_Effluent"))
Overview_genera <-amp_heatmap(data = dn_overview14,
group_by = c("Plant","Location","Date"),
tax_aggregate = "Genus",
tax_show = 10,
plot_colorscale = "sqrt",
plot_values_size = 6,
color_vector = c("White", "Red"),
max_abundance = 30,
round = 1)+
theme(axis.text.x = element_text(size = 18, color = "black", vjust = 0.5),
axis.text.y = element_text(size = 18, color = "black"),
strip.text = element_text(size = 24, color = "black"),
legend.position = "right")+
facet_wrap(~Plant, scales = "free_x")
## changing labels
original_labelseff<-levels(Overview_genera$data$Group)
#new_labelseff <- (sub(".*? ", "", original_labelseff))
new_labelseff <- (sub("1_","", original_labelseff))
new_labelseff <- (sub("2_","", new_labelseff))
new_labelseff <- (sub("6_","", new_labelseff))
#new_labelseff <- (sub("WWTP A ","", original_labelseff))
#new_labelseff <- (sub("WWTP B ","", new_labelseff))
#new_labelseff <- (sub("WWTP C ","", new_labelseff))
#new_labelseff <- (sub("WWTP D ","", new_labelseff))
#new_labelseff <- (sub("WWTP E ","", new_labelseff))
#new_labelseff <- (sub("WWTP F ","", new_labelseff))
#new_labelseff <- (sub("WWTP G ","", new_labelseff))
#new_labelseff <- (sub("WWTP H ","", new_labelseff))
#new_labelseff <- (sub("WWTP I ","", new_labelseff))
#new_labelseff <- (sub("WWTP J ","", new_labelseff))
#new_labelseff <- (sub("WWTP K ","", new_labelseff))
#new_labelseff <- (sub("WWTP L ","", new_labelseff))
#new_labelseff <- (sub("WWTP M ","", new_labelseff))
#new_labelseff <- (sub("WWTP N ","", new_labelseff))
Overview_genera$data$Group.label<-mapvalues(Overview_genera$data$Group,original_labelseff,c(new_labelseff))
Overview_genera<-Overview_genera+aes(x=Overview_genera$data$Group.label)+theme(axis.title.x=element_blank(), strip.text.x = element_text(margin = margin(.2, 0, .2, 0, "cm")))
Overview_genera
ggsave(filename = "s2.pdf",plot = Overview_genera,width = 45,height = 30)
ggsave(filename = "s2.png",plot = Overview_genera,width = 45,height = 30)
Figure showing how dilution and filtration of activated sludge does not affect the community composition by diluting to influent and effluent cell concentation before filtration with 0.2 um polycarbonate filters.
#dcontrols_dilution <- amp_subset_samples(dcontrols, SampleID %in% c("16SAMP-8984","16SAMP-8985","16SAMP-8986","16SAMP-8990","16SAMP-8991","16SAMP-8992","16SAMP-8993","16SAMP-8994"))
dcontrols_dilution <- amp_subset_samples(dcontrols, SampleID %in% c("16SAMP-7603","16SAMP-7604","16SAMP-7605","16SAMP-8984","16SAMP-8985","16SAMP-8986","16SAMP-8990","16SAMP-8991","16SAMP-8992","16SAMP-8993","16SAMP-8994"))
s3 <- amp_heatmap(data = dcontrols_dilution,
group_by = c("Plant","Location","Date"),
tax_aggregate = "Genus",
tax_show = 10,
plot_colorscale = "sqrt",
plot_values_size = 6,
color_vector = c("White", "Red"),
max_abundance = 30,
round = 1)+
theme(axis.text.x = element_text(size = 18, color = "black", vjust = 0.5),
axis.text.y = element_text(size = 18, color = "black"),
strip.text = element_text(size = 24, color = "black"),
legend.position = "right")
ggsave(filename = "s3.pdf",plot = s3,width = 9,height = 8)
ggsave(filename = "s3.png",plot = s3,width = 9,height = 8)
s3
Analysing the waterphase of the sludge, compared to the microbial community of the total sludge sample.
dn_overview14 <- amp_subset_samples(dnumbers, Plant %in% c("WWTP A","WWTP B","WWTP C","WWTP G","WWTP D","WWTP E","WWTP F","WWTP H", "WWTP I", "WWTP J", "WWTP K", "WWTP L", "WWTP M", "WWTP N"))
waterphase <- amp_subset_samples(dn_overview14, Location %in% c("1_Influent","4_Sludge Supernatant","2_Process tank", "6_Effluent"))
waterphase345 <- amp_subset_samples(dn_overview14, Location %in% c("3_Total Sludge","4_Sludge Supernatant"))
location <- c(
`3_Total Sludge`= "Total Sludge",
`4_Sludge Supernatant` = "Sludge Supernatant" )
s4a <-amp_heatmap(waterphase345,
group_by = c("Location", "Plant"),
tax_aggregate = "Genus",
tax_show = 10,
plot_colorscale = "sqrt",
plot_values_size = 6,
plot_values = TRUE,
color_vector = c("White", "Red"),
max_abundance = 30) +
theme(axis.text.x = element_text(size = 18, color = "black", vjust = 0.5),
axis.text.y = element_text(size = 18, color = "black"),
strip.text = element_text(size = 25, color = "black"),
legend.position = "right", legend.title=element_text(size=15), legend.text=element_text(size = 14))+
facet_wrap(~Location, scales = "free_x", labeller = as_labeller(location))
original_labelsarco<-levels(s4a$data$Group)
#new_labelsarco <- (sub(".*? ", "", original_labelsarco))
new_labelsarco <- (sub("3_","", original_labelsarco))
new_labelsarco <- (sub("4_","", new_labelsarco))
new_labelsarco <- (sub("5_","", new_labelsarco))
new_labelsarco <- (sub("Total Sludge ","", new_labelsarco))
new_labelsarco <- (sub("Sludge Supernatant ","", new_labelsarco))
new_labelsarco <- (sub("Supernatant After Shear ","", new_labelsarco))
s4a$data$Group.label<-mapvalues(s4a$data$Group,original_labelsarco,c(new_labelsarco))
s4a<-s4a+aes(x=s4a$data$Group.label)+theme(axis.title.x=element_blank(), strip.text.x = element_text(margin = margin(.2, 0, .2, 0, "cm")))
ggsave(filename = "s4a.pdf",plot = s4a,width = 25,height = 6)
ggsave(filename = "s4a.png",plot = s4a,width = 25,height = 6)
s4aa <- amp_boxplot(waterphase345, group_by = "Location", sort_by = "median",
plot_type = "boxplot", point_size = 1, tax_aggregate = "Genus",
tax_add = NULL, tax_show = 10, tax_empty = "best",
tax_class = NULL, order_group = NULL, order_y = NULL,
plot_flip = FALSE, plot_log = FALSE, adjust_zero = NULL,
normalise = TRUE, detailed_output = FALSE) + geom_boxplot(lwd=1) + theme(axis.text.y = element_text(size = 16, color = "black"), axis.text.x = element_text(size = 14, color = "black"), axis.title.x = element_text(size = 14, color = "black"), legend.position = "right", legend.title=element_text(size=14), legend.text=element_text(size = 14))
ggsave(filename = "4aa.pdf",plot = s4aa,width = 10,height = 7)
ggsave(filename = "4aa.png",plot = s4aa,width = 10,height = 7)
s4aa
The abundance of the 15 most abundant species-level Arcobacter
dn_overview14 <- amp_subset_samples(dnumbers, Plant %in% c("WWTP A","WWTP B","WWTP C","WWTP G","WWTP D","WWTP E","WWTP F","WWTP H", "WWTP I", "WWTP J", "WWTP K", "WWTP L", "WWTP M", "WWTP N"))
dn_overview14a <- amp_subset_samples(dn_overview14, Location %in% c("1_Influent", "2_Process tank", "6_Effluent"))
Arcobacter <- amp_subset_taxa(dn_overview14a, tax_vector=c("g__Arcobacter"), normalise = TRUE)
location <- c(
`1_Influent` = "Influent",
`2_Process tank` = "Process tank",
`6_Effluent` = "Effluent"
)
fig5 <-amp_heatmap(data = Arcobacter,
group_by = c("Plant","Location"),
tax_aggregate = "Species",
tax_show = 15,
plot_colorscale = "sqrt",
plot_values_size = 6,
color_vector = c("White", "Red"),
normalise = FALSE,
max_abundance = 30,
round = 2)+
theme(axis.text.x = element_text(size = 18, color = "black", vjust = 0.5),
axis.text.y = element_text(size = 18, color = "black"),
strip.text = element_text(size = 24, color = "black"),
legend.position = "none", legend.title=element_text(size=15), legend.text=element_text(size = 14))+
facet_wrap(~Location, scales = "free_x", labeller = as_labeller(location))
original_labelsarco<-levels(fig5$data$Group)
#new_labelsarco <- (sub(".*? ", "", original_labelsarco))
new_labelsarco <- (sub("1_","", original_labelsarco))
new_labelsarco <- (sub("2_","", new_labelsarco))
new_labelsarco <- (sub("6_","", new_labelsarco))
new_labelsarco <- (sub("Influent","", new_labelsarco))
new_labelsarco <- (sub("Process tank","", new_labelsarco))
new_labelsarco <- (sub("Effluent","", new_labelsarco))
fig5$data$Group.label<-mapvalues(fig5$data$Group,original_labelsarco,c(new_labelsarco))
fig5<-fig5+aes(x=fig5$data$Group.label)+theme(axis.title.x=element_blank(), strip.text.x = element_text(margin = margin(.2, 0, .2, 0, "cm")))
ggsave(filename = "fig5.pdf",plot = fig5,width = 29,height = 7)
ggsave(filename = "fig5.png",plot = fig5,width = 29,height = 7)
fig5
Standard example of biomass calculation. Number of cells in each sample was calculated based on the cell number found using (Saunders et al. 2015) method and then were converted into the mass of bacteria measured in SS/L and then converted to VSS in a given sample. The following standard example is from wastewater treatment plant A at the sampling date 09-11-2014, For the general example in this study, the parameters are determined for a ‘typical Danish treatment plant’ with biological nutrient removal similar to what is done in Saunders et al. (2015).
Vinfluent = 41560 #Volume influent [m3 day-1]
SS_in = 0.085 #[g/L] Concentration of SS in the influent
SS_AS = 3.4 #[g/L] Concentration of SS in the process tank
SS_OUT = 0.0015 #[g/L] Concentration of SS in the effluent
vSS_in = SS_in*0.70 #[g/L] conversion of SS to VSS (Frølund et al. 1996)
vSS_AS = SS_AS*0.70 #[g/L]
vSS_OUT = SS_OUT*0.70 #[g/L]
cells_pr_gram_vss = 5*10^11 #[Cells/g VSS] Frølund et al. 1996
Nbacteria_influent <- 1.4*10^14 #Cells pr. m3 # 1.4*10^14 = a total abundance of organisms in the wastewater was assumed equal to previous reports for wastewater from the same region (Vollertsen et al. 2001)
nWW = Vinfluent * 1.4*10^14 #Cells in influent pr day
nWW
## [1] 5.8184e+18
gram_vss_incoming_pr_day <- nWW / cells_pr_gram_vss #Gram VSS pr. day
ton_vss_incoming_pr_day <- gram_vss_incoming_pr_day / 10^6 #Ton VSS pr. day
ton_vss_incoming_pr_day
## [1] 11.6368
nCells_pr_L <- vSS_AS*cells_pr_gram_vss
nCells_pr_L #Number of cells per L activated sludge
## [1] 1190000000000
VSS_AS <- 0.70*SS_AS #VSS [g/L]
gram_total_VSS_AS <- (Vinfluent*1000)*VSS_AS # Total gram VSS in activated sludge [g]
Mass_AS <- gram_total_VSS_AS / 10^6 #Total ton VSS
#Volumes of process tanks unknown therefore we take the assume a HRT of the water in the process tank is similar to the amount of influent coming pr day
Mass_AS #Total ton VSS
## [1] 98.9128
nAS <- nCells_pr_L * 1000 * 39480 #39480 [m3] is the volume of the process tanks in WWTP A
#Flow (Q) = Flow(Influent) - Flow(Surplus sludge) = 0.97 * 41.56 * 103 m3 day-1
#Effluent SS: (1-5 mg SS L-1, here used 1.5 mg L-1): 1.5 mg SS L-1 = 1.5 * 10-3 g SS m-3
nCells_pr_L_effluent <- SS_OUT*0.7*cells_pr_gram_vss
nCells_pr_L_effluent # A correction for the amount of vSS in effluent in this case we have aprox 5.2*10^8 Cells/L
## [1] 525000000
gram_vss_effluent_pr_day <- nWW / cells_pr_gram_vss #Gram VSS pr. day
ton_vss_incoming_pr_day <- gram_vss_incoming_pr_day / 10^6 #Ton VSS pr. day
ton_vss_incoming_pr_day
## [1] 11.6368
effluentVolume <- (Vinfluent*97/100) #97% of influent water volume
gram_VSS_eff <- (effluentVolume*1000) * vSS_OUT # Total gram VSS in effluent [g]
VSS_eff <- gram_VSS_eff / 10^3 #Total Kg VSS
VSS_eff
## [1] 42.32886
E = (ton_vss_incoming_pr_day-(VSS_eff/1000))/ton_vss_incoming_pr_day*100 #(Henze et al. 2002)
E
## [1] 99.63625
infab = 14.1 #16S abundance of *Arcobacter* in influent
ASab = 0.9 #16S abundance of *Arcobacter* in activated sludge
effab = 14.5 #16S abundance of *Arcobacter* in effluent
cellsarcoinf = infab/100*ton_vss_incoming_pr_day
cellsarcoeff = effab/100*VSS_eff
Earco = (cellsarcoinf-(cellsarcoeff/1000))/cellsarcoinf*100 #(Henze et al. 2002)
Earco
## [1] 99.62593
Arcoinf = (infab/100)*(1.4*10^14/1000)
Arcoinf
## [1] 19740000000
Arcopro = (ASab/100) * nCells_pr_L
Arcopro
## [1] 10710000000
Arcoeff = (effab/100)*nCells_pr_L_effluent
Arcoeff
## [1] 76125000
To determine if bacteria abundant in the influent have active net growth in process tanks the net growth rate constants are calculated for top 10 genera in figure 2 as described in Saunders et al. 2016.
# K = (pAS * nSP - pWW * nWW) / (pAS * nAS) Saunders et al. 2016
nSP = nAS / 35
col_a <- c("Trichococcus", "Streptococcus", "Arcobacter", "Blautia", "Acidovorax", "Lactococcus", "Subdogranulum", "Enterococcus", "Leptotrichia", "Comamonas")
col_b <- c((3.5 * nSP - 5.1 * nWW) / (3.5 *nAS), (1.4 * nSP - 11.9 * nWW) / (1.4 *nAS), (0.6 * nSP - 15.3 * nWW) / (0.6 *nAS), (0.4 * nSP - 3.9 * nWW) / (0.4 *nAS), (0.8 * nSP - 3.4 * nWW) / (0.8 *nAS), (0.2 * nSP - 0.7 * nWW) / (0.2 *nAS), (0.2 * nSP - 1.5 * nWW) / (0.2 *nAS), (0.1 * nSP - 0.6 * nWW) / (0.1 *nAS), (0.2 * nSP - 0.8 * nWW) / (0.2 *nAS), (0.3 * nSP - 2.8 * nWW) / (0.3 *nAS) )
table <- matrix(col_b,ncol=1,byrow=TRUE)
colnames(table) <- c("Net growth rate constant")
rownames(table) <- c(col_a)
table <- as.table(table)
table
## Net growth rate constant
## Trichococcus -0.1518888
## Streptococcus -1.0241135
## Arcobacter -3.1294833
## Blautia -1.1789201
## Acidovorax -0.4977710
## Lactococcus -0.4048871
## Subdogranulum -0.9002682
## Enterococcus -0.7145003
## Leptotrichia -0.4668097
## Comamonas -1.1273179
Arcobacter was detected in the influent as well as the effluent at WWTPs. Following this observation it was assessed whether these organisms were alive by isolating them from the influent and effluent wastewater from WWTP A. Viable cells were found, their DNA was extracted and sequenced. In addition samples from the influent and effluent streams were analysed using metagenome sequencing to assess whether the obtained isolates match the most abundant organisms in the wild or they represented a less abundant version. From the isolates it was also investigated if the isolated Arcobacter strains were pathogenic.
Analysing the data from this assembly revealed only 1 organism, the data is okay for further analysis
d387<- mmload(assembly = "data/NexPE-387/assembly.fasta",
coverage = "data/NexPE-387/",
essential_genes = read_csv(paste0("data/NexPE-387/essential.csv"), T),
taxonomy = read_csv("data/NexPE-387/tax.csv", T, col_types = cols("c", "c")),
kmer_pca = F,
kmer_BH_tSNE = F,kmer_size = 5,
num_threads = 2
)
d387assembly<-assembly
rm(assembly)
#d387_PEnetwork<-read.csv("data/NexPE-387/network.txt")
mmplot(mm = d387,
x = "length",
y = "cov_NexPE387",
color_by = "phylum")+expand_limits(y=0)
This genome is found to be contaminated and is therefore cleaned by differential coverage binning before further analysis.
d388<- mmload(assembly = "data/NexPE-388/assembly.fasta",
coverage = "data/NexPE-388/",
essential_genes = read_csv(paste0("data/NexPE-388/essential.csv"), T),
taxonomy = read_csv("data/NexPE-388/tax.csv", T, col_types = cols("c", "c")),
kmer_pca = F,
kmer_BH_tSNE = F,kmer_size = 5,
num_threads = 2
)
d388assembly<-assembly
rm(assembly)
#d388_PEnetwork<-read.csv("data/NexPE-388/network.txt")
sel <- data.frame(length = c(0, 52820.684, 52721.696, 8785.992, 6809.334,4),
cov_NexPE388 = c(97.292, 80.706, 357.931, 502.467, 1168.281, 1194.042))
mmplot(mm = d388,locator = F, selection = sel,
x = "length",
y = "cov_NexPE388",
color_by = "phylum"
)
mm_subset_d388 <- d388 %>% filter(cov_NexPE388>100)
mmplot(mm = d388,locator = F, selection = sel,
x = "length",highlight_scaffolds = mm_subset_d388,
y = "cov_NexPE388",
color_by = "phylum"
)
mmexport(mm_subset_d388,
assembly = d388assembly,
file = "d388clean.fa")
Analysing the data from this assembly revealed only 1 organism, the data is okay for further analysis
d389<- mmload(assembly = "data/NexPE-389/assembly.fasta",
coverage = "data/NexPE-389/",
essential_genes = read_csv(paste0("data/NexPE-389/essential.csv"), T),
taxonomy = read_csv("data/NexPE-389/tax.csv", T, col_types = cols("c", "c")),
kmer_pca = F,
kmer_BH_tSNE = F,kmer_size = 5,
num_threads = 2
)
d389assembly<-assembly
rm(assembly)
#d389_PEnetwork<-read.csv("data/NexPE-389/network.txt")
mmplot(mm = d389,
x = "length",
y = "cov_NexPE389",
color_by = "phylum")+expand_limits(y=0)
This genome is found to be contaminated and is therefore cleaned by differential coverage binning before further analysis.
d390<- mmload(assembly = "data/NexPE-390/assembly.fasta",
coverage = "data/NexPE-390/",
essential_genes = read_csv(paste0("data/NexPE-390/essential.csv"), T),
taxonomy = read_csv("data/NexPE-390/tax.csv", T, col_types = cols("c", "c")),
kmer_pca = F,
kmer_BH_tSNE = F,kmer_size = 5,
num_threads = 2
)
d390assembly<-assembly
rm(assembly)
#d390_PEnetwork<-read.csv("data/NexPE-390/network.txt")
sel <- data.frame(length = c(0, 52820.684, 52721.696, 8785.992, 6809.334,4),
cov_NexPE388 = c(97.292, 80.706, 357.931, 502.467, 1168.281, 1194.042))
mmplot(mm = d390,locator = F, selection = sel,
x = "length",
y = "cov_NexPE390",
color_by = "phylum"
)
mm_subset_d390 <- d390 %>% filter(cov_NexPE390>100)
mmplot(mm = d390,locator = F, selection = sel,
x = "length",highlight_scaffolds = mm_subset_d390,
y = "cov_NexPE390",
color_by = "phylum"
)
mmexport(mm_subset_d390,
assembly = d390assembly,
file = "d390clean.fa")
library("dplyr")
library(gridExtra)
library(grid)
checkmstats <- read_csv("data/assemblystats.csv") %>%
select(bin,
Genome_size,
GC,
Longest_scaffold,
N50_scaffolds,
#n_scaffolds,
n_contigs,
n_ambiguous_bases,
Completeness,
Contamination) %>%
mutate(GC=round(GC*100,1))
colnames(checkmstats)<-c("Genome",
"Genome\nsize",
"GC\ncontent\n(%)",
"Longest\nscaffold",
"Scaffold\nN50",
#"#\nScaffolds",
"#\nContigs",
"# Ns",
"Completeness\n(%)",
"Contamination\n(%)")
tt3 <- ttheme_default(core=list(fg_params=list(hjust=1, x = 0.95, fontsize = 8)),
colhead=list(fg_params=list(fontsize = 10)))
grid.table(checkmstats, rows= NULL, theme = tt3)
This figure show the coverage of the influent metagenome (NexPE-391) and the effluent metagenome (NexPE-364), on the influent metagenome assembly for plant A. It can be seen that almost all contigs that has assembled from the influent also can be found in the effluent, however it has lower coverage in the effluent.
d391<- mmload(assembly = "data/NexPE-391/assembly.fasta",
coverage = "data/NexPE-391/",
essential_genes = read_csv(paste0("data/NexPE-391/essential.csv"), T),
taxonomy = read_csv("data/NexPE-391/tax.csv", T, col_types = cols("c", "c")),
kmer_pca = F,
kmer_BH_tSNE = F,kmer_size = 5,
num_threads = 2
)
d391assembly<-assembly
rm(assembly)
#d391_PEnetwork<-read.csv("data/NexPE-391/network.txt")
mmplot(mm = d391,
x = "cov_NexPE364",x_scale = "log10",
y = "cov_NexPE391",y_scale = "log10",
color_by = "phylum")
This figure show the coverage of the influent metagenome (NexPE-391) and the effluent metagenome (NexPE-364), on the effluent metagenome assembly for plant A. This clearly shows a fractionated community where the effluent is split in half between what is commonly found in the influent and what is found in the process tank, emphasizing that for this wastewater treatment plant the effluent community is a combination of influent and process tank community and not only illustrating the process tank community.
d364<- mmload(assembly = "data/NexPE-364/assembly.fasta",
coverage = "data/NexPE-364/",
essential_genes = read_csv(paste0("data/NexPE-364/essential.csv"), T),
taxonomy = read_csv("data/NexPE-364/tax.csv", T, col_types = cols("c", "c")),
kmer_pca = F,
kmer_BH_tSNE = F,kmer_size = 5,
num_threads = 2
)
d364assembly<-assembly
rm(assembly)
#d364_PEnetwork<-read.csv("data/NexPE-364/network.txt")
mmplot(mm = d364,
x = "cov_NexPE391",x_scale = "log10",
y = "cov_NexPE364",y_scale = "log10",
color_by = "phylum")
Figure s4 show the coverage of the influent metagenome (NexPE-391) and the effluent metagenome (NexPE-364), on the influent metagenome assembly for plant A. on this figure the 2 most abundant Arcobacter 16s rRNA gene contigs are highlighted and their closest known taxa found with NCBI-Blast is written. The Arcobacter 16S with most coverage is the same as the one with most abundance found by amplicons. The coverage however is lower in effluent than in influent.
d391$taxonomy<-NA
d391$taxonomy[which(d391$scaffold==1895)]<-"16S rRNA - Arcobacter cryaerophilus"
d391$taxonomy[which(d391$scaffold==4178)]<-"16S rRNA - Arcobacter suis"
d391$taxonomy[which(d391$scaffold==66159)]<-"Virulence gene - pldA"
d391$taxonomy[which(d391$scaffold==3116)]<-"Virulence gene - tlyA"
d391$taxonomy[which(d391$scaffold==17851)]<-"Virulence gene - tlyA"
d391$taxonomy[which(d391$scaffold==45727)]<-"Virulence gene - tlyA"
mmplot(mm = d391,
x = "cov_NexPE364",x_scale = "log10",
y = "cov_NexPE391",y_scale = "log10",
color_by = "phylum",
highlight_scaffolds = c("1895", "4178","66159","3116", "17851","45727"),
label_scaffolds = c("1895", "4178","66159","3116", "17851","45727"),label_scaffolds_by = "taxonomy") + xlab("cov metagenome Effluent")+ ylab("cov metagenome Influent") + labs("cov *Arcobacter* genome 1")
ggsave(filename = "d391_warcobacter.png",width = 10,height = 10)
mmplot(mm = d391,
x = "cov_NexPE364",x_scale = "log10",
y = "cov_NexPE391",y_scale = "log10",
color_by = "cov_NexPE387",
highlight_scaffolds = c("1895", "4178", "66159","3116", "17851","45727"),
label_scaffolds = c("1895", "4178","66159","3116", "17851","45727")) + xlab("cov metagenome Effluent")+ ylab("cov metagenome Influent") + labs("cov *Arcobacter* genome 1")